#!/usr/bin/env python
# coding=utf-8

# Build the docs with docker!
#
# Step 1 is to build a docker image based on Bitnami's minideb.
# Step 2 is to translate the arguments that build_docs.pl supports into
# a list of arguments to be passed to start the docker container and a
# list of arguments to be passed to the build_docs.pl process that is
# started in the docker container.
# Step 3 is to start the docker container. We start it in such a way
# that is *should* remove itself when it is done.
#
# I'm aware that this is a fairly non-standard way to use docker but
# for the most part all we want docker for is to get us into a consistent
# environment it'll do the trick. At least for now.
#
# The shebang line on this script has a story too! As written it targets
# "whatever version of python you have in the command line". Originally
# we wanted to target just python2 because that is a nice "lowest common
# denominator". But macOS doesn't have a `python2` executable, only
# `python`. So we have to shebang for `python`. PEP 394 says that if we
# do that we must make the script compatible with `python3` *and*
# `python2`. And it doesn't just say that to be dictatorial! Arch Linux's
# `python` command *is* `python3`. So, if we want to be compatible with
# Arch, we have to support both. And we'd prefer that this script be
# compatible with everything.

from __future__ import print_function

import logging
from os import environ, getgid, getuid
from os.path import basename, dirname, exists, expanduser, isdir
from os.path import join, normpath, realpath
import re
import subprocess
from sys import platform, version_info
import time
import webbrowser

# The tag we use for our docker image. This should line up with the tags
# in preview/Dockerfile and publish_docker.sh
DOCKER_BUILD_QUIET_TIME = 3  # seconds
SSH_AGENT_HELP = 'https://help.github.com/en/articles/generating-a-new-ssh-key-and-adding-it-to-the-ssh-agent#adding-your-ssh-key-to-the-ssh-agent'  # noqa

DIR = dirname(realpath(__file__))
logger = logging.getLogger('build_docs')
# Enable Docker's fancy buildkit
environ['DOCKER_BUILDKIT'] = '1'


def docker_tag(name):
    return 'docker.elastic.co/docs/' + name + ':latest'


def build_docker_image(name):
    docker_logger = logging.getLogger('docker build')
    # We attempt to spool up the output from docker build so we can hide it
    # if the command is successful *and* runs quickly. If it takes longer
    # than the DOCKER_BUILD_QUIET_TIME then we log all of the output. I
    # know this is a little sneaky, but folks run this from the command
    # line so frequently that they'll appreciate cleaner output. I hope.
    start_logging_at = time.time() + DOCKER_BUILD_QUIET_TIME
    acc = []

    def handle_line(line):
        if time.time() >= start_logging_at:
            if acc:
                docker_logger.info(
                    'Building the docker image that will build the docs. ' +
                    'Expect this to take somewhere between a couple seconds ' +
                    'and five minutes.')
            for line in acc:
                docker_logger.info(line)
            del acc[:]
            docker_logger.info(line)
        else:
            acc.append(line)

    cmd = [
        "docker", "image", "build",
        "--target", name,
        "--tag", docker_tag(name),
        DIR]
    build = common_popen(cmd)
    handle_popen(build, handle_line)
    if build.returncode != 0:
        for line in acc:
            docker_logger.error(line)
        raise subprocess.CalledProcessError(build.returncode, cmd)


class OpenBrowserState:
    def __init__(self):
        self.need_nginx = True
        self.need_parcel = False
        self.need_preview = False
        self.opened = False

    def building_single_book(self):
        """
        If we're building a single book we need to wait for Parcel to finish
        building the CSS and JS for the first time so we can be sure its server
        is up and running. If we're building all books we run parcel as part
        of the build before starting nginx so we don't have to wait for it.
        """
        self.need_parcel = True
        self.need_preview = True

    def handle_line(self, line):
        if self.opened:
            return True
        show = True
        if 'start worker processes' in line:
            # Wait for nginx or nothing will be listening
            self.need_nginx = False
        if '✨  Built in ' in line:
            # Wait for parcel to finish building for the first time or the
            # server might not yet be up and nginx will fail to proxy to it.
            self.need_parcel = False
            show = False
        if 'preview server is listening on 3000' in line:
            self.need_preview = False
            show = False
        if not self.need_nginx and \
                not self.need_parcel and \
                not self.need_preview:
            if platform == "darwin" and 'BROWSER' not in environ:
                # On mac webbrowser seeem to want to default to safari which
                # is weird. If we tell it that the browser's name is `open`
                # we'll get whatever the user set as their default browser.
                environ['BROWSER'] = 'open'
            webbrowser.open('http://localhost:8000/guide',
                            new=1, autoraise=False)
            self.opened = True
        return show


def run_build_docs(args):
    docker_args = standard_docker_args()
    build_docs_args = ['--in_standard_docker']

    mounted_doc_repo_roots = set()
    mounted_doc_repo_names = set()
    mounted_alternates = set()

    def mount_docs_repo_and_dockerify_path(path_in_repo, path):
        """Adds a mount for the root of the repository into the docker
        container and rewrites the path so that it is inside that mount.

        If the repo happens to already be mounted we won't add two mounts.
        """
        repo_root = subprocess.check_output(
                ['git', 'rev-parse', '--show-toplevel'],
                cwd=path_in_repo)
        repo_root = repo_root.decode('utf-8').strip()
        repo_name = basename(repo_root)
        if 'x-pack-' in repo_name:
            # x-pack-foo repositories expect to be mounted in the foo-extra
            # directory so we should oblige them so they'll build. This
            # information is sort of available in conf.yaml but it is
            # mixed into each book even though it is a property of the
            # repository. Thus we hard code it here. Sad, but much less
            # complex than digging it out of conf.yaml. Prior to the build_docs
            # folks had to check the repos out into a particular directory
            # structure but build_docs is host-machine directory structure
            # agnostic and we like it that way.
            extra_name = repo_name.replace('x-pack-', '')
            repo_mount = '/doc/%s-extra/%s' % (extra_name, repo_name)
        else:
            repo_mount = '/doc/' + repo_name
        if repo_root not in mounted_doc_repo_roots:
            if repo_name in mounted_doc_repo_names:
                raise ArgError("Can't mount two repos with the same " +
                               "name [%s]" % repo_name)
            mounted_doc_repo_roots.add(repo_root)
            mounted_doc_repo_names.add(repo_name)
            docker_args.extend([
                '-v',
                '%s:%s:ro,cached' % (repo_root, repo_mount)
            ])
            mount_alternates(path_in_repo)
        if not path.startswith(repo_root):
            cmd = 'git rev-parse --show-toplevel'
            err = ("provided path doesn't contain `%s`:\n"
                   "    path: %s\n"
                   "toplevel: %s\n"
                   "This may be caused by case insensitive path matching") % \
                (cmd, path, repo_root)
            raise ArgError(err)
        return repo_mount + path[len(repo_root):]

    def mount_alternates(path_in_repo):
        # We'd prefer to use --absolute-git-dir here but not all users have it
        git_dir = subprocess.check_output(
                ['git', 'rev-parse', '--git-dir'],
                cwd=path_in_repo)
        git_dir = git_dir.decode('utf-8').strip()
        git_dir = normpath(join(path_in_repo, git_dir))
        alternates_file = git_dir + '/objects/info/alternates'
        if not exists(alternates_file):
            return

        with open(alternates_file, 'r') as f:
            for line in f.readlines():
                # Sadly, the alternates have to be mounted in the docker image
                # in *exactly* the same spot where they are in the host
                # filesystem or else git will not be able to find them!
                alternates_root = line.strip()
                if alternates_root in mounted_alternates:
                    continue
                mounted_alternates.add(alternates_root)
                docker_args.extend([
                    '-v',
                    '%s:%s:ro,cached' % (alternates_root, alternates_root)
                ])
                mount_alternates(alternates_root)

    open_browser = False
    args = Args(args)
    saw_doc = False
    saw_out = False
    should_forward_ssh_auth_into_container = False
    arg = args.next_arg()
    while arg is not None:
        build_docs_args.append(arg)
        if arg == '--all':
            if 'SSH_AUTH_SOCK' not in environ:
                raise ArgError('--all requires an agent to be running and ' +
                               'SSH_AUTH_SOCK to be set')
            if not platform.startswith('linux'):
                docker_args.extend(['--tmpfs', '/run/nginx'])
                docker_args.extend(['--tmpfs', '/root'])
                docker_args.extend(['--publish', '8022:22/tcp'])
                docker_args.extend(
                        ['-e', 'SSH_AUTH_SOCK=/tmp/forwarded_ssh_auth'])
                should_forward_ssh_auth_into_container = True
            else:
                auth_sock = realpath(environ['SSH_AUTH_SOCK'])
                auth_sock_dir = dirname(auth_sock)
                docker_args.extend([
                        '-v',
                        '%s:%s:ro' % (auth_sock_dir, auth_sock_dir)
                ])
                docker_args.extend(['-e', 'SSH_AUTH_SOCK=%s' % auth_sock])
            known_hosts = realpath('%s/.ssh/known_hosts' % environ['HOME'])
            if exists(known_hosts):
                # If we have known_hosts mount them into the container so it
                # won't ask about github
                docker_args.extend([
                        '-v',
                        '%s:/tmp/.ssh/known_hosts:ro,cached' % known_hosts])
        elif arg == '--alternatives':
            alternatives = args.next_arg_or_err()
            split = alternatives.split(':')
            if len(split) != 3:
                err = "--alternatives must be " + \
                      "<source_lang>:<dest_lang>:<path> but was %s" % examples
                raise ArgError(err)
            alternatives_dir = realpath(expanduser(split[2]))
            if not isdir(alternatives_dir):
                raise ArgError(
                    "Can't find --alternatives %s" % alternatives_dir
                )
            mounted = mount_docs_repo_and_dockerify_path(
                    alternatives_dir, alternatives_dir)
            build_docs_args.append("%s:%s:%s" % (split[0], split[1], mounted))
        elif arg == '--conf':
            conf_file = realpath(args.next_arg_or_err())
            if not exists(conf_file):
                raise ArgError("Can't find --conf %s" % conf_file)
            docker_args.extend(['-v', '%s:/conf.yaml:delegated' % conf_file])
            build_docs_args.append('/conf.yaml')
        elif arg == '--doc':
            doc_file = realpath(args.next_arg_or_err())
            if not exists(doc_file):
                raise ArgError("Can't find --doc %s" % doc_file)
            build_docs_args.append(mount_docs_repo_and_dockerify_path(
                    dirname(doc_file), doc_file))
            saw_doc = True
        elif arg == '--open':
            docker_args.extend(['--publish', '8000:8000/tcp'])
            docker_args.extend(['--publish', '8001:8001/tcp'])
            open_browser = OpenBrowserState()
        elif arg == '--out':
            out_dir = realpath(args.next_arg_or_err())
            docker_args.extend(['-v', '%s:/out:delegated' % dirname(out_dir)])
            build_docs_args.append('/out/%s' % basename(out_dir))
            saw_out = True
        elif arg == '--push':
            author_name = from_env_or_git_config(
                    'GIT_AUTHOR_NAME', 'user.name')
            author_email = from_env_or_git_config(
                    'GIT_AUTHOR_EMAIL', 'user.email')
            committer_name = from_env_or_git_config(
                    'GIT_COMMITTER_NAME', 'user.name')
            committer_email = from_env_or_git_config(
                    'GIT_COMMITTER_EMAIL', 'user.email')
            docker_args.extend([
                    '-e', 'GIT_AUTHOR_NAME=%s' % author_name,
                    '-e', 'GIT_AUTHOR_EMAIL=%s' % author_email,
                    '-e', 'GIT_COMMITTER_NAME=%s' % committer_name,
                    '-e', 'GIT_COMMITTER_EMAIL=%s' % committer_email,
            ])
        elif arg == '--reference':
            reference_dir = realpath(args.next_arg_or_err())
            if not exists(reference_dir):
                raise ArgError("Can't find --reference %s" % reference_dir)
            docker_args.extend(['-v',
                                '%s:/reference:ro,cached' % reference_dir])
            build_docs_args.append('/reference')
        elif arg == '--resource':
            resource_dir = realpath(args.next_arg_or_err())
            if not isdir(resource_dir):
                raise ArgError("Can't find --resource %s" % resource_dir)
            build_docs_args.append(mount_docs_repo_and_dockerify_path(
                    resource_dir, resource_dir))
        elif arg == '--sub_dir':
            sub = args.next_arg_or_err()
            m = re.match('(?P<repo>[^:]+):(?P<branch>[^:]+):(?P<dir>.+)', sub)
            if not m:
                raise ArgError("Invalid --sub_dir %s" % sub)
            sub_dir = realpath(expanduser(m.group('dir')))
            if not exists(sub_dir):
                raise ArgError("Can't find --sub_dir %s" % sub_dir)
            mounted_path = mount_docs_repo_and_dockerify_path(sub_dir, sub_dir)
            build_docs_args.append("%s:%s:%s" % (
                    m.group('repo'), m.group('branch'), mounted_path))
        arg = args.next_arg()

    # Mount alternatives used by the docs repo if there are any in case we have
    # to use git on the docs repo itself.
    mount_alternates(DIR)

    if saw_doc and not saw_out:
        # If you don't specify --out then we dump the output into
        # $pwd/html_docs to keep backwards compatibility with build_docs.pl.
        docker_args.extend(['-v', '%s:/out:delegated' % realpath('.')])
        build_docs_args.extend(['--out', "/out/html_docs"])

    if platform == "darwin" and saw_doc and open_browser:
        # Docker for Mac has trouble watching filesystem events on a mounted
        # volume, so Parcel's watcher was not triggering rebuilds. Using
        # polling is more expensive and requires a higher ulimit, but
        # resolves the issue. See:
        # - https://github.com/docker/for-mac/issues/2216
        # - https://github.com/parcel-bundler/parcel/issues/2539
        docker_args.extend([
            "-e", "CHOKIDAR_USEPOLLING=1",
            "--ulimit", "nofile=90000:90000"
        ])

    cmd = []
    cmd.extend(['docker', 'run'])
    cmd.extend(docker_args)
    cmd.extend([docker_tag('build'), '/docs_build/build_docs.pl'])
    cmd.extend(build_docs_args)
    docker_run = common_popen(cmd)

    if open_browser and saw_doc:
        open_browser.building_single_book()

    def handle_line(line):
        show = True
        if 'Server running at http://localhost:1234' in line:
            show = False
        if open_browser:
            show = show and open_browser.handle_line(line)
        if show:
            logger.info(line)
        if 'failed: port is already allocated.' in line:
            logger.error('Another process has a port we need. Is there '
                         'already a docs build running with --open?')
        if should_forward_ssh_auth_into_container:
            match = re.match('Waiting for ssh auth to be forwarded to (.+)',
                             line)
            if match:
                forward_ssh_auth_into_container(match.group(1), docker_run)

    handle_popen(docker_run, handle_line)
    return docker_run.returncode


def forward_ssh_auth_into_container(container, docker_run):
    """Forwards the authorized keys into the container by sshing into it.

    This requires a few steps:
    1. copy your public key into the docker container's authorized_keys and
       start sshd.
    2. ssh into the container, forwarding your SSH_AUTH_SOCKET
    3. chmod the auth socket so it is usable by the docs user
    4. symlink it into the spot where the perl script expects it to be
    """
    ssh_setup_logger = logging.getLogger('ssh setup')

    def log_line(line):
        ssh_setup_logger.info(line)

    ssh_setup_logger.info('Getting public key to forward to container')
    public_keys = []

    def record_public_key(public_key):
        public_keys.append(public_key)

    cmd = ['ssh-add', '-L']
    get_ssh_public_key = common_popen(cmd)
    handle_popen(get_ssh_public_key, record_public_key)
    keys = "\n".join(public_keys) + "\n"
    if get_ssh_public_key.returncode != 0:
        if 'The agent has no identities.' in keys:
            raise ArgError('--all requires an identity be registered with the '
                           'ssh-agent on macOS. See ' + SSH_AGENT_HELP + ' '
                           'for instructions.')
        raise subprocess.CalledProcessError(get_ssh_public_key.returncode,
                                            cmd, keys)

    ssh_setup_logger.info('Setting up ssh on container')
    cmd = [
        'docker', 'exec',
        # Override the default user back to root instead of the docs user
        '-u', '0:0',
        '-i',
        container,
        'bash', '-c',
        'mkdir -m700 /root/.ssh && ' +
        'cat > /root/.ssh/authorized_keys && ' +
        'chmod 600 /root/.ssh/authorized_keys &&' +
        'service ssh start'
    ]
    setup_ssh = common_popen(cmd)
    setup_ssh.stdin.write(encode(keys))
    setup_ssh.stdin.close()
    line = decode(setup_ssh.stdout.readline()).rstrip()
    ssh_setup_logger.info(line)
    setup_ssh.wait()
    if (setup_ssh.returncode != 0 or
            line != 'Starting OpenBSD Secure Shell server: sshd.'):
        handle_popen(setup_ssh, log_line)
        raise subprocess.CalledProcessError(setup_ssh.returncode, cmd)

    ssh_setup_logger.info(
            "Forwarding auth socket. IMPORTANT - If this process takes more " +
            "than a few seconds it has hung and should be manually aborted. " +
            "I'm very sorry.")
    cmd = [
        'ssh',
        '-o', 'StrictHostKeyChecking=no',
        '-o', 'UserKnownHostsFile=/dev/null',
        '-o', 'LogLevel=QUIET',
        '-AT', 'root@localhost',
        '-p', '8022'
    ]
    setup_auth_socket = common_popen(cmd)
    setup_auth_socket.stdin.write(
            encode('ln -s $SSH_AUTH_SOCK /tmp/forwarded_ssh_auth\n'))
    setup_auth_socket.stdin.write(
            encode('chmod 777 -R $(dirname $SSH_AUTH_SOCK)\n'))
    setup_auth_socket.stdin.write(
            encode('echo Done! SSH should work in the container now\n'))
    setup_auth_socket.stdin.flush()
    # We intentionally leave stdin open here to keep the auth socket in place
    # for the duration of the run.
    line = decode(setup_auth_socket.stdout.readline())
    while line:
        line = line.rstrip()
        ssh_setup_logger.info(line)
        if line == 'Done! SSH should work in the container now':
            docker_run.stdin.write(encode('ready\n'))
            docker_run.stdin.flush()
            return
        line = decode(setup_auth_socket.stdout.readline())
    # The ssh connection terminated without giving our signal that it set up
    # the forwarding successfully. So lets log it and tell the user
    # we've failed.
    raise subprocess.CalledProcessError(setup_auth_socket.returncode, cmd)


class Args:
    def __init__(self, args):
        # Normalize the args so it is simpler to iterate over them
        self.args = []
        for arg in args:
            # Fail `-foo` style arguments
            if re.match('^-[^-]+$', arg):
                raise ArgError('Use [-%s] instead of [%s]' % (arg, arg))

            # Replace `--foo=bar` style with `--foo bar` style
            split = arg.split('=')
            if len(split) > 2:
                raise ArgError('Invalid argument [%s]' % arg)
            self.args.extend(split)
        self.current = 0

    def next_arg(self):
        if self.current >= len(self.args):
            return None
        result = self.args[self.current]
        self.current += 1
        return result

    def next_arg_or_err(self):
        prev = self.args[self.current - 1]
        next_arg = self.next_arg()
        if next_arg is None:
            raise ArgError("Missing argument for %s" % prev)
        return next_arg


def common_popen(cmd):
    """Start a subprocess in a way that is compatible with handle_popen.
    """
    # Use a PIPE for stdin so if our process dies then the docs build sees
    # stdin close which it will use as a signal to die.
    return subprocess.Popen(cmd,
                            stdin=subprocess.PIPE,
                            stdout=subprocess.PIPE,
                            stderr=subprocess.STDOUT)


def handle_popen(popen, handle_line):
    """Reads lines from a Popen object that is running and waits for it
    to complete.
    """
    # `for line in build.stdout` buffers the lines into many chunks which
    # isn't pleasant to use on the command line because it makes it looks
    # like everything is happening in bursts. The implementation below
    # spits the lines out as they come.
    line = decode(popen.stdout.readline())
    while line:
        line = line.rstrip()
        handle_line(line)
        line = decode(popen.stdout.readline())
    popen.wait()


def decode(bytes_or_str):
    """Decode the result of reading from popen's stdout. In python 2 the
    parameter will by a str already so we just return it. In python 3 we
    have to decode it.
    """
    if version_info[0] < 3:
        return bytes_or_str
    return bytes_or_str.decode('utf-8')


def from_env_or_git_config(envname, gitname):
    if envname in environ:
        return environ[envname]
    try:
        return decode(
                subprocess.check_output(['git', 'config', gitname])
        ).rstrip()
    except subprocess.CalledProcessError as e:
        m = "specify the [%s] environment variable or configure [%s] in git"
        raise ArgError(m % (envname, gitname))


def encode(str_to_write):
    """Encode a str to be written to popen's stdin. In python 2 the
    parameter can be written as a string so we just return it. In python 3 we
    need to encode it into a utf-8 bytes sequence.
    """
    if version_info[0] < 3:
        return str_to_write
    return str_to_write.encode('utf-8')


def standard_docker_args():
    """Standard arguments used to launch docker from this script.
    """
    docker_args = []
    # Remove the container immediately when we're done building the docs
    docker_args.append('--rm')
    # Make sure we create files as the current user because that is what
    # folks that use build_docs.pl expect.
    uid = getuid()
    if uid == 0:
        raise ArgError("This process isn't likely to suceed if run as root")
    docker_args.extend(['--user', '%d:%d' % (uid, getgid())])
    # Mount the docs build code so we can run it!
    docker_args.extend(['-v', '%s:/docs_build:cached' % DIR])
    # Seccomp adds a *devestating* performance overhead if you happen
    # to have it installed.
    docker_args.extend(['--security-opt', 'seccomp=unconfined'])
    # Keep stdin open so the docs build can use closing it as a signal that
    # it needs to die.
    docker_args.append('-i')
    # Pass the node name into the docker image if it is set
    if 'NODE_NAME' in environ:
        docker_args.extend(['-e', 'NODE_NAME=%s' % environ['NODE_NAME']])
    # Ritual to make nginx run (even with -t) as the mapped user
    docker_args.extend(['--tmpfs', '/run/nginx',
                        '--tmpfs', '/var/log/nginx',
                        '--tmpfs', '/var/lib/nginx/body',
                        '--tmpfs', '/var/lib/nginx/fastcgi',
                        '--tmpfs', '/var/lib/nginx/proxy',
                        '--tmpfs', '/var/lib/nginx/uwsgi',
                        '--tmpfs', '/var/lib/nginx/scgi'])
    return docker_args


class ArgError(Exception):
    pass


if __name__ == '__main__':
    from sys import argv, stdout
    try:
        logging.basicConfig(level=logging.INFO)
        if '--docker-run' == argv[1]:
            if len(argv) < 4:
                raise ArgError(
                    'Usage: build_docs --docker-run <image> <args>')
            image = argv[2]
            cwd = realpath('.')
            if not cwd.startswith(DIR):
                raise ArgError(
                    '--docker-run must be invoked from within the repo')
            docker_cwd = '/docs_build/' + cwd[len(DIR):]
            build_docker_image(image)
            cmd = ['docker', 'run']
            cmd.extend(standard_docker_args())
            cmd.extend(['--workdir', docker_cwd])
            if stdout.isatty():
                cmd.append('-t')
            cmd.append(docker_tag(image))
            cmd.extend(argv[3:])
            returncode = subprocess.call(cmd)
            exit(returncode)
        if '--docker-build' == argv[1]:
            if len(argv) != 3:
                raise ArgError(
                    'Usage: build_docs --build-image <image>')
            image = argv[2]
            build_docker_image(image)
            exit(0)

        build_docker_image("build")
        exit(run_build_docs(argv[1:]))
    except ArgError as e:
        print(e)
        exit(1)
    except subprocess.CalledProcessError as e:
        print(e)
        if e.output:
            print(e.output)
        exit(e.returncode)
    except KeyboardInterrupt:
        # Just quit if we get ctrl-c
        exit(1)
